library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(readxl)
library(gganimate)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
c2015 <- read_excel(path='c2015.xlsx')
df <- c2015 %>%
  filter_all(~!(.=="Unknown")) %>%
  filter_all(~!(.=="Not Rep")) %>%
  filter_all(~!(.=="Not Reported")) %>%
  filter_all(~!(.==str_detect(.,'Not Rep'))) %>%
  filter_all(~!(.==str_detect(.,'Unknown'))) %>%
  filter_all(~!(.==str_detect(.,'Not Reported'))) %>%
  filter(SEAT_POS=='Front Seat, Left Side') %>%
  mutate(TRAV_SP = as.numeric(substr(TRAV_SP, 1, 3))) %>%
  mutate(AGE = as.numeric(case_when(AGE=="Less than 1"~"0",
                                    TRUE~AGE))) %>%
  mutate(AGE=case_when(is.na(AGE) ~ mean(AGE,na.rm=TRUE),
                       TRUE ~ AGE)) %>%
  mutate(SEX=case_when(is.na(SEX) | SEX=="Not Rep" | SEX=="Unknown" ~ "Female",
                       TRUE~SEX)) %>%
  filter_all(~!is.na(.))
## Warning: NAs introduced by coercion
  1. Continue with Question 15 of Assignment 5. We want to add a moving/transition variable to the grapth using the gganimate package. Do the follows to achieve that
ggplot(df%>%
  group_by(SEX,INJ_SEV,MONTH)%>%
  summarise(avg=mean(TRAV_SP))%>% 
         mutate(speedZ=(avg-mean(avg))/sd(avg))%>% 
         mutate(spType=case_when(speedZ<0~'Below', TRUE~'Above')), 
       aes(x=MONTH, y=speedZ, label=speedZ))+
  geom_bar(stat='identity', aes(fill=spType, width=0.8))+
  scale_fill_manual(name='Monthly Speed',
                    labels = c('Above Average', 'Below Average'),
                    values = c("Above"="#00ba38", "Below"="#f8766d"))+
  labs(subtitle='Normalized average monthly speeds',
       title='Diverging Bars')+
  coord_flip()+
  transition_states(MONTH)+
  labs(title='MONTH={closest_state}')
## Warning: Ignoring unknown aesthetics: width

  1. Plot bar charts of DRINKING filled by SEX with moving (transition) variable MONTH. Can you add the frequency to each bar?
ggplot(df, aes(x=DRINKING, fill=SEX))+
  geom_bar()+
  transition_states(MONTH)+
  labs(title='MONTH={closest_state}')

  1. In this question, we work with the household debt and credit data. Do the follows to import the data to R
ggplot(hdcd, aes(x=Student.Loan, y=Credit.Card))+
  geom_line()

  1. We want to add a moving variable in the graph of 3. The function transition_reveal (link) is great for this. You may tempt to add transition_reveal(Quarter) but notice that transition_reveal does not take the current form of Quarter. Hint: You can create a dummy variable running from 1 to the size of the data and make it the transition variable.
hdcd$observation <- 1:nrow(hdcd) 
ggplot(hdcd, aes(x=Student.Loan, y=Credit.Card))+
  geom_line()+
  transition_reveal(observation)

  1. The Quarter variable is not in the right format (date). Create the date column where the date is the first day of each quarter. Plot the graph of Student.Loan by date. Hint Use: the seq.date functions with the increment being three months.
hdcd$date<-seq.Date(as.Date('2003-01-01'), as.Date('2019-06-01'), by = 'quarter')
ggplot(hdcd, aes(x=date, y=Student.Loan))+
  geom_line()

  1. Add transition_reveal(date) to the plot in Question 5. to reveal the graph by quarters.
ggplot(hdcd, aes(x=date, y=Student.Loan))+
  geom_line()+
  transition_reveal(date)

  1. Use geom_point and geom_text to plot the moving point and the value of the moving points. Hint: geom_point()+ geom_text(aes(label=Student.Loan)) should work.
ggplot(hdcd, aes(x=date, y=Student.Loan))+
  geom_point()+
  transition_reveal(date)+
  geom_line()+
  geom_text(aes(label=Student.Loan))

  1. Include the graphs of other debts to the plot in Question 7, revealing them by date/quarter and differentiating them by colors. Hint: you may want to change the data from long to wide using the gather function.
temp<-hdcd%>%
  gather(Value, key=Debt, c(Mortgage, Credit.Card, Student.Loan, Auto.Loan, HE.Revolving, Other))

ggplot(temp, aes(x=date, y=Value,color=Debt))+
  geom_line()+
  geom_point()+
  geom_text(aes(label=Value))+
  transition_reveal(date)

  1. What is the debt that most correlated with the Total debt. Plot the graph of this debt and the total together revealing by years, differentiation by colors. Plot the remaining debt together in another plot, revealing by years, differentiating by colors. Give a comment on the plots. Label and put captions to the plots.
hdcd %>%
  select(-c(observation,date,Quarter))%>%
  as.matrix()%>%
  rcorr(type="spearman")
##              Mortgage HE.Revolving Auto.Loan Credit.Card Student.Loan
## Mortgage         1.00         0.38      0.39        0.77         0.49
## HE.Revolving     0.38         1.00     -0.36        0.15        -0.22
## Auto.Loan        0.39        -0.36      1.00        0.36         0.76
## Credit.Card      0.77         0.15      0.36        1.00         0.13
## Student.Loan     0.49        -0.22      0.76        0.13         1.00
## Other           -0.04        -0.24     -0.12        0.52        -0.57
## Total            0.93         0.11      0.63        0.71         0.72
##              Other Total
## Mortgage     -0.04  0.93
## HE.Revolving -0.24  0.11
## Auto.Loan    -0.12  0.63
## Credit.Card   0.52  0.71
## Student.Loan -0.57  0.72
## Other         1.00 -0.11
## Total        -0.11  1.00
## 
## n= 66 
## 
## 
## P
##              Mortgage HE.Revolving Auto.Loan Credit.Card Student.Loan
## Mortgage              0.0015       0.0013    0.0000      0.0000      
## HE.Revolving 0.0015                0.0028    0.2206      0.0748      
## Auto.Loan    0.0013   0.0028                 0.0034      0.0000      
## Credit.Card  0.0000   0.2206       0.0034                0.2907      
## Student.Loan 0.0000   0.0748       0.0000    0.2907                  
## Other        0.7550   0.0545       0.3574    0.0000      0.0000      
## Total        0.0000   0.3802       0.0000    0.0000      0.0000      
##              Other  Total 
## Mortgage     0.7550 0.0000
## HE.Revolving 0.0545 0.3802
## Auto.Loan    0.3574 0.0000
## Credit.Card  0.0000 0.0000
## Student.Loan 0.0000 0.0000
## Other               0.3639
## Total        0.3639

Mortgage is the most correlated with the total.

temp2<-hdcd%>%
  gather(Value, key=Debt, c(Mortgage, Credit.Card, Student.Loan, Auto.Loan, HE.Revolving, Other, Total))

temp2 %>%
  filter(Debt=="Mortgage"|Debt=="Total") %>%
  filter(str_detect(Quarter, 'Q1'))%>%
  ggplot(aes(x=date, y=Value,color=Debt))+
  geom_line()+
  geom_point()+
  geom_text(aes(label=Value))+
  transition_reveal(date)+
  labs(title='Mortgage and Total debt by Year',
       subtitle = "Mortgage is the most correlated type of debt with the overall debt.")

temp2 %>%
  filter(Debt != "Mortgage" & Debt != "Total") %>%
  filter(str_detect(Quarter, 'Q1'))%>%
  ggplot(aes(x=date, y=Value,color=Debt))+
  geom_line()+
  geom_point()+
  geom_text(aes(label=Value))+
  transition_reveal(date)+
  labs(title='Non-Mortgage debt sources',
       subtitle = 'These 6 debt types are less correlated with the total debt')

Its unfortunate to see that student loan debt is rising drastically compared to all other debts other than auto. Its interesting to see how correlated the rise in total debt is to mortage debt. They almost move in unison.

  1. Use transition_reveal and transition_states to explore the data of the U.S Economy. Plot three animated plots.
ggplot(temp, aes(x=Debt, y=Value, fill=Debt))+
  geom_bar(stat="identity")+
  transition_states(date)+
  labs(title='{closest_state}')

temp%>%
  ggplot(aes(x=1,y=Value, fill=Debt))+
  geom_bar(stat="identity", position = "fill")+
  transition_states(date)+
  labs(title="Debt by quarter",
       subtitle = '{closest_state}')

temp%>%
  filter(Debt!="Mortgage")%>%
  ggplot(aes(x=date, y=Value,color=Debt))+
  geom_line(position = "jitter", size=6)+
  geom_point(position="jitter")+
  transition_reveal(date)